package com.chance.cc.crawler.development.bootstrap.douban;

import com.alibaba.fastjson.JSON;
import com.chance.cc.crawler.core.CrawlerEnum;
import com.chance.cc.crawler.core.downloader.HttpConfig;
import com.chance.cc.crawler.core.downloader.proxy.Proxy;
import com.chance.cc.crawler.core.filter.FilterInfo;
import com.chance.cc.crawler.core.filter.FilterUtils;
import com.chance.cc.crawler.core.record.CrawlerRecord;
import com.chance.cc.crawler.core.record.CrawlerRequestRecord;
import com.chance.cc.crawler.development.controller.DevCrawlerController;
import org.apache.commons.lang3.StringUtils;

import static com.chance.cc.crawler.core.CrawlerEnum.CrawlerRecordFilter.*;
import static com.chance.cc.crawler.core.CrawlerEnum.CrawlerRequestType.*;

/**
 * @Author Zhao.Hhuan
 * @Date Create in 2021/3/30 13:28
 * @Description
 *      豆瓣关键词查询
 **/
public class DoubanSearchKw {
    public static String domain = "douban";

    private static Proxy proxy = new Proxy();
    static {
        //代理配置
        proxy.setHost("http-dyn.abuyun.com");
        proxy.setPort(9020);
        proxy.setUsername("HEW657EL99F83S9D");
        proxy.setPassword("8916B1F3F10B1979");
    }

    public static void main(String[] args) {
//        keywordRecord();
        itemRecord();
//        keywordOneRecord();
    }

    public static void keywordRecord(){
        String url = "https://www.douban.com/search";
        String site = "searchKw";
        String siteBiz = "news-realtime";

        FilterInfo filterInfo = new FilterInfo();
        filterInfo.setFilter(count);
        filterInfo.setCurCount(0);
        filterInfo.setCountTotalNum(6);

        CrawlerRequestRecord crawlerRequestRecord = CrawlerRequestRecord.builder()
                .startPageRequest(domain, CrawlerEnum.CrawlerRequestType.turnPage)
                .domain(domain)
                .httpUrl(url)
                .httpConfig(HttpConfig.me(domain))
                .filter(key)
                .httpHead("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36")
//                .httpHead("cookie","bid=WXYDCa8B2Ow")
                .addFilterInfo(FilterUtils.memoryFilterKeyInfo(domain))
//                .addFilterInfo(FilterUtils.dateRangeFilterInfo(24,null))
                .needParsed(false)
                .releaseTime(System.currentTimeMillis())
                .resultLabelTag(CrawlerEnum.CrawlerDataType.article)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.interaction)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.comment)
                .proxy(proxy)
                .build();
        crawlerRequestRecord.setDownload(false);
        crawlerRequestRecord.setSkipPipeline(true);
        crawlerRequestRecord.tagsCreator().bizTags().addDomain(domain);
        crawlerRequestRecord.tagsCreator().bizTags().addSite(site);
        crawlerRequestRecord.tagsCreator().bizTags().addSiteBiz(siteBiz);
        crawlerRequestRecord.getHttpRequest().addExtra("searchKwSourceUrl","https://www.douban.com/j/search?q=%s&start=1&cat=1015");

        CrawlerRecord commentFilter = new CrawlerRequestRecord();
        commentFilter.setFilter(key);
        commentFilter.addFilterInfo(FilterUtils.memoryFilterKeyInfo(StringUtils.joinWith("-","filter",domain,site,"queue")));
//        commentFilter.addFilterInfo(FilterUtils.dateRangeFilterInfo(24 ,null));
        crawlerRequestRecord.tagsCreator().resultTags().getCategoryTag().addKVTag("comment_filter_record", JSON.toJSONString(commentFilter));

        CrawlerRequestRecord keywordRecord = CrawlerRequestRecord.builder()
                .startPageRequest("douban_keyword",turnPageItem)
                .httpUrl("http://192.168.1.215:9599/v1/meta/"+domain+"/keys?site=meixin")
                .requestLabelTag(supportSource)
                .requestLabelTag(internalDownload)
                .build();

        DevCrawlerController.builder()
                .triggerInfo(domain,domain,System.currentTimeMillis(),domain)
                .crawlerRequestQueue(DevCrawlerController.devRequestQueue(domain))
                .consoleResultPipeline()//控制台输出
//                .fileResultPipeline("D:\\chance\\log\\tets.log",true) //文件输出
                .requestRecord(crawlerRequestRecord)
                .supportRecord(keywordRecord)
                .build()
                .start();
    }

    public static void keywordOneRecord(){
        String url = "https://www.douban.com/j/search?q=%E4%B8%81%E9%A6%99%E5%8C%BB%E7%94%9F&start=1&cat=1015";
        String site = "searchKw";
        String siteBiz = "news-realtime";

        FilterInfo filterInfo = new FilterInfo();
        filterInfo.setFilter(count);
        filterInfo.setCurCount(0);
        filterInfo.setCountTotalNum(6);

        CrawlerRequestRecord crawlerRequestRecord = CrawlerRequestRecord.builder()
                .startPageRequest(domain, CrawlerEnum.CrawlerRequestType.turnPage)
                .domain(domain)
                .httpUrl(url)
                .httpConfig(HttpConfig.me(domain))
                .filter(key)
                .httpHead("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36")
                .turnPageFilterInfo(filterInfo)
                .addFilterInfo(FilterUtils.memoryFilterKeyInfo(domain))
//                .addFilterInfo(FilterUtils.dateRangeFilterInfo(24,null))
                .releaseTime(System.currentTimeMillis())
                .resultLabelTag(CrawlerEnum.CrawlerDataType.article)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.interaction)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.comment)
                .proxy(proxy)
                .build();
        crawlerRequestRecord.tagsCreator().bizTags().addDomain(domain);
        crawlerRequestRecord.tagsCreator().bizTags().addSite(site);
        crawlerRequestRecord.tagsCreator().bizTags().addSiteBiz(siteBiz);


        CrawlerRecord commentFilter = new CrawlerRequestRecord();
        commentFilter.setFilter(key);
        commentFilter.addFilterInfo(FilterUtils.memoryFilterKeyInfo(StringUtils.joinWith("-","filter",domain,site,"queue")));
//        commentFilter.addFilterInfo(FilterUtils.dateRangeFilterInfo(24 ,null));
        crawlerRequestRecord.tagsCreator().resultTags().getCategoryTag().addKVTag("comment_filter_record", JSON.toJSONString(commentFilter));

        DevCrawlerController.builder()
                .triggerInfo(domain,domain,System.currentTimeMillis(),domain)
                .crawlerRequestQueue(DevCrawlerController.devRequestQueue(domain))
                .consoleResultPipeline()//控制台输出
//                .fileResultPipeline("D:\\chance\\log\\tets.log",true) //文件输出
                .requestRecord(crawlerRequestRecord)
                .build()
                .start();
    }

    public static void itemRecord(){
//        String url = "https://www.douban.com/note/206506698/";
//        String url = "https://www.douban.com/note/672183010/";
//        String url = "http://www.douban.com/note/206506698/";
//        String url = "http://www.douban.com/note/734445413/";
        String url = "https://www.douban.com/j/search?q=%E6%B0%B4%E6%BB%B4+%E8%8D%AF%E5%BA%97&start=1&cat=1015";
        String site = "searchKw";
        String siteBiz = "news-realtime";

        CrawlerRequestRecord crawlerRequestRecord = CrawlerRequestRecord.builder()
                .startPageRequest(domain, CrawlerEnum.CrawlerRequestType.turnPage)
                .domain(domain)
                .httpUrl(url)
                .httpConfig(HttpConfig.me(domain))
                .filter(key)
                .addFilterInfo(FilterUtils.memoryFilterKeyInfo(domain))
//                .httpHead("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36")
//                .addFilterInfo(FilterUtils.dateRangeFilterInfo(24,null))
//                .needWashed(true)
                .releaseTime(System.currentTimeMillis())
                .resultLabelTag(CrawlerEnum.CrawlerDataType.article)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.interaction)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.comment)
//                .proxy(proxy)
                .build();
        crawlerRequestRecord.tagsCreator().bizTags().addDomain(domain);
        crawlerRequestRecord.tagsCreator().bizTags().addSite(site);
        crawlerRequestRecord.tagsCreator().bizTags().addSiteBiz(siteBiz);

        CrawlerRecord commentFilter = new CrawlerRequestRecord();
        commentFilter.setFilter(key);
        commentFilter.addFilterInfo(FilterUtils.memoryFilterKeyInfo(StringUtils.joinWith("-","filter",domain,site,"queue")));
//        commentFilter.addFilterInfo(FilterUtils.dateRangeFilterInfo(24 ,null));
        crawlerRequestRecord.tagsCreator().resultTags().getCategoryTag().addKVTag("comment_filter_record", JSON.toJSONString(commentFilter));


        DevCrawlerController.builder()
                .triggerInfo(domain,domain,System.currentTimeMillis(),domain)
                .crawlerRequestQueue(DevCrawlerController.devRequestQueue(domain))
                .consoleResultPipeline()//控制台输出
//                .fileResultPipeline("D:\\chance\\log\\tets.log",true) //文件输出
                .requestRecord(crawlerRequestRecord)
                .build()
                .start();
    }
}
